(ns metabase-enterprise.semantic-search.index-metadata
  "Multi-index management for pgvector semantic search.

  Tracks multiple embedding model indexes through metadata tables, supporting model switching
  without data loss. Each embedding model (provider/name/dimensions) gets its own index table,
  with a control table managing which index is currently active."
  (:require
   [honey.sql :as sql]
   [honey.sql.helpers :as sql.helpers]
   ;; TODO: extract schema code to go under db.migration
   [metabase-enterprise.semantic-search.index :as semantic.index]
   [metabase-enterprise.semantic-search.util :as semantic.util]
   [metabase.util :as u]
   [metabase.util.log :as log]
   [next.jdbc :as jdbc]
   [next.jdbc.result-set :as jdbc.rs]))

(set! *warn-on-reflection* true)

;; metadata and control tables allow multiple indexes to coexist
;; while maintaining a single active pointer.
;; This enables model switching without data loss - old indexes remain available and can still be targeted with index functions.
;; Garbage / pending / switchover policy beyond 'one is active' TBD, right now we want the active index to reflect the global
;; embedding configuration in metabase settings / envars.
(def ^:private schema-0
  "The version 0 schema for an index metadata system. This acts as a 'root' for indexes, and assumes a single set of documents
  that will be indexed into multiple indexes (for different embedding models and so on)."
  {:metadata
   [[:id :bigint [:primary-key] [:raw "GENERATED BY DEFAULT AS IDENTITY"]]
    [:provider :text :not-null]
    [:model_name :text :not-null]
    [:vector_dimensions :int :not-null]
    [:table_name :text :not-null :unique]
    [:index_version :int :not-null]
    [:index_created_at :timestamp-with-time-zone :not-null]
    [:indexer_last_poll :timestamp-with-time-zone :null]
    [:indexer_last_seen :timestamp-with-time-zone :null]
    [:indexer_last_seen_id :text :null]
    [:indexer_last_seen_hash :text :null]
    [:indexer_stalled_at :timestamp-with-time-zone :null]]

   :control
   [[:id :bigint [:primary-key]] ;; not auto-inc, only one row - still useful to ensure only one row when inserting.
    [:version :text :not-null]
    [:active_id :int :null]
    [:active_updated_at :timestamp-with-time-zone :null]]

   :gate
   [[:id :text :not-null :primary-key]
    [:model :text :not-null]
    [:model_id :text :not-null]
    [:updated_at :timestamp-with-time-zone :not-null]
    [:gated_at   :timestamp-with-time-zone :not-null :default [:clock_timestamp]]
    ;; null signals a delete
    [:document :jsonb :null]
    ;; assumes some text encoding. 'TEXT' is just so convenient to get that sweet equality.
    ;; the actual hash encoding is pretty much irrelevant as long as nodes agree on it most of the time.
    ;; even if they do not, as a redundancy filtering optimisation - nothing too bad happens.
    [:document_hash :text :null]]})

(defn qualify-index
  "Qualifies an index-map, returning a new index-map that might have additional disambiguating prefixes
   applied to table names.

  This ensures tables/constraints created as part of operating a meta index can be isolated within
  the same database."
  [index index-metadata]
  ;; if created databases for namespacing in tests we could probably remove this whole idea.
  (let [{:keys [index-table-qualifier]} index-metadata
        qualify (fn [table-name]
                  (format index-table-qualifier table-name))]
    (-> index
        (update :table-name qualify))))

(def default-index-metadata
  "The default index metadata configuration that will be used for the search engine integration."
  {:version                   "2"
   :metadata-table-name       "index_metadata"
   :control-table-name        "index_control"
   :gate-table-name           "index_gate"
   :index-table-qualifier     "%s"})

(defn- create-index-metadata-table-if-not-exists-sql [index-metadata]
  (let [{:keys [metadata-table-name]} index-metadata
        schema (:metadata schema-0)]
    (-> (sql.helpers/create-table (keyword metadata-table-name) :if-not-exists)
        (sql.helpers/with-columns schema)
        (sql/format :quoted true))))

(defn- create-control-table-if-not-exists-sql [index-metadata]
  (let [{:keys [control-table-name]} index-metadata
        schema (:control schema-0)]
    (-> (sql.helpers/create-table (keyword control-table-name) :if-not-exists)
        (sql.helpers/with-columns schema)
        (sql/format :quoted true))))

(defn- create-gate-table-if-not-exists-sql [index-metadata]
  (let [{:keys [gate-table-name]} index-metadata
        schema (:gate schema-0)]
    (-> (sql.helpers/create-table (keyword gate-table-name) :if-not-exists)
        (sql.helpers/with-columns schema)
        (sql/format :quoted true))))

(comment
  (create-index-metadata-table-if-not-exists-sql default-index-metadata)
  (create-control-table-if-not-exists-sql default-index-metadata)
  (create-gate-table-if-not-exists-sql default-index-metadata))

(defn ensure-control-row-exists!
  "Ensures the singleton control row exists in the control table.
  Idempotent - uses ON CONFLICT DO NOTHING to avoid duplicate key errors."
  [pgvector index-metadata]
  (let [{:keys [control-table-name version]} index-metadata]
    (jdbc/execute!
     pgvector
     (-> (sql.helpers/insert-into (keyword control-table-name))
         ;; Control table uses hardcoded id=0 to enforce a singleton row.
         ;; This ensures exactly one control row exists, simplifying active index lookups
         ;; and avoiding race conditions when multiple nodes initialize simultaneously.
         (sql.helpers/values [{:id                0
                               :version           version
                               :active_id         nil
                               :active_updated_at nil}])
         (sql.helpers/on-conflict :id)
         (sql.helpers/do-nothing)
         (sql/format :quoted true)))
    nil))

(defn create-tables-if-not-exists!
  "Creates the metadata and control tables if they do not already exist"
  [pgvector index-metadata]
  (let [{:keys [metadata-table-name control-table-name gate-table-name version]} index-metadata]
    ;; **note** we do not currently deal with version mismatches as there is only one version.
    ;; metadata
    (log/infof "Creating metadata table if not exists %s (%s)" metadata-table-name version)
    (jdbc/execute!
     pgvector
     (create-index-metadata-table-if-not-exists-sql index-metadata))
    ;; control
    (log/infof "Creating control table if not exists %s (%s)" control-table-name version)
    (jdbc/execute!
     pgvector
     (create-control-table-if-not-exists-sql index-metadata))
    ;; gate
    (log/infof "Creating gate table if not exists %s (%s)" gate-table-name version)
    (jdbc/execute!
     pgvector
     (create-gate-table-if-not-exists-sql index-metadata))
    (log/info "Creating gate table gated_at index if not exists")
    (jdbc/execute!
     pgvector
     (sql/format
      (sql.helpers/create-index
       [(keyword (str (:gate-table-name index-metadata) "_gated_at")) :if-not-exists]
       [(keyword (:gate-table-name index-metadata)) :gated_at :id])
      :quoted true))
    (log/info "Creating gate table tombstone cleanup index if not exists")
    ;; Partial index on nil documents in the gate table to optimize identification of old tombstones
    (jdbc/execute!
     pgvector
     (sql/format
      {:create-index
       [[(keyword (str (:gate-table-name index-metadata) "_tombstone_cleanup")) :if-not-exists]
        [(keyword (:gate-table-name index-metadata)) :gated_at]]
       :where [:and [:= :document nil] [:= :document_hash nil]]}
      :quoted true))
    nil))

(defn drop-tables-if-exists!
  "Drops both metadata and control tables if they exist.
  Used for cleanup in tests and development - destructive operation."
  [pgvector index-metadata]
  ;; we could move this to test.util as it is currently redundant in prod. I like it being here for locality and later
  ;; we will probably use it trigger 'start fresh', even on deployed nodes.
  (let [{:keys [metadata-table-name
                control-table-name
                gate-table-name]}
        index-metadata]
    (jdbc/execute!
     pgvector
     (-> (sql.helpers/drop-table :if-exists (keyword metadata-table-name))
         (sql/format :quoted true)))
    (jdbc/execute!
     pgvector
     (-> (sql.helpers/drop-table :if-exists (keyword control-table-name))
         (sql/format :quoted true)))
    (jdbc/execute!
     pgvector
     (-> (sql.helpers/drop-table :if-exists (keyword gate-table-name))
         (sql/format :quoted true)))
    nil))

(defn- row->index [{:keys [provider
                           model_name
                           vector_dimensions
                           table_name
                           index_version]}]
  {:embedding-model  {:provider          provider
                      :model-name        model_name
                      :vector-dimensions vector_dimensions}
   :table-name       table_name
   :version          index_version})

(defn- index-table-exists? [pgvector index]
  (semantic.util/table-exists? pgvector (:table-name index)))

(defn- control-and-metadata-tables-exist?
  [pgvector index-metadata]
  (and (semantic.util/table-exists? pgvector (:metadata-table-name index-metadata))
       (semantic.util/table-exists? pgvector (:control-table-name index-metadata))))

(defn get-active-index-state
  "Returns the currently active index configuration, or nil if none active.

  Expect a map with:
  - :index
     The indexes parameters, table-name, model etc, that you can use with functions taking `index` in the semantic search package)
  - :metadata-row
     The metadata record for this index, as returned by next.jdbc. Keys are unqualified."
  [pgvector index-metadata]
  (when (control-and-metadata-tables-exist? pgvector index-metadata)
    (let [{:keys [metadata-table-name
                  control-table-name]}
          index-metadata
          ;; Returns nil if no active index is set (active_id is null).
          active-row-sql      (-> {:select [:m.*]
                                   :from   [[(keyword control-table-name)  :c]]
                                   :join   [[(keyword metadata-table-name) :m] [:= :m.id :c.active_id]]}
                                  (sql/format :quoted true))
          active-row          (jdbc/execute-one! pgvector active-row-sql {:builder-fn jdbc.rs/as-unqualified-lower-maps})]
      (when active-row
        {:index (row->index active-row)
         :metadata-row active-row}))))

(defn activate-index!
  "Sets the specified index as active by updating the control table. The index-id is the `id` column value for
  the indexes metadata row."
  [pgvector index-metadata index-id]
  (assert index-id)
  (assert (nat-int? index-id) (format "expected an integer id (hint do not pass the index map!)"))
  (let [{:keys [control-table-name]} index-metadata
        ;; only one row, no traditional WHERE for now
        activation-sql (-> {:update (keyword control-table-name)
                            :set    {:active_id         index-id
                                     :active_updated_at [:now]}}
                           (sql/format :quoted true))]
    (jdbc/execute! pgvector activation-sql)
    nil))

(defn find-compatible-index!
  "Locates a compatible existing index for the given embedding model.

  Returns information about the compatible index if found, or nil if no compatible index exists.

  Returns map with:
  - :index              the index parameters, table name, version etc.
  - :metadata-row       the unqualified next.jdbc row
  - :index-table-exists whether the table named by the `:index` actually exists in the database
  - :active             whether this index is currently active

  Returns nil if no compatible index is found."
  [pgvector index-metadata embedding-model]
  (let [{:keys [metadata-table-name
                control-table-name]}
        index-metadata

        {:keys [provider
                model-name
                vector-dimensions]}
        embedding-model

        model-rows-sql (-> {:select [:m.* [[:coalesce [:= :c.active_id :m.id] false] :is_active]]
                            :from   [[(keyword control-table-name) :c]
                                     [(keyword metadata-table-name) :m]]
                            :where  [:and
                                     [:= :provider provider]
                                     [:= :model_name model-name]
                                     [:= :vector_dimensions vector-dimensions]]
                            :order-by [[:index_created_at :desc]]}
                           (sql/format :quoted true))
        model-rows     (jdbc/execute! pgvector model-rows-sql {:builder-fn jdbc.rs/as-unqualified-lower-maps})

        {[active] true
         inactive false}
        (u/group-by :is_active #(dissoc % :is_active) model-rows)]
    ;; Priority order: active matching index > inactive matching index > new
    ;; This preserves existing data when possible and avoids unnecessary index creation.
    (cond
      ;; already active index matching model, return it.
      active
      (let [index (row->index active)]
        {:index              index
         :index-table-exists (index-table-exists? pgvector index)
         :metadata-row       active
         :active             true})

      ;; has an inactive index
      (seq inactive)
      (let [best-index-row (first inactive)
            best-index     (row->index best-index-row)]
        {:index              best-index
         :index-table-exists (index-table-exists? pgvector best-index)
         :metadata-row       best-index-row
         :active             false})

      ;; no compatible index found
      :else
      nil)))

(defn create-new-index-spec
  "Creates a new index specification for the given embedding model.
  This is used when no compatible index exists and a new one needs to be created."
  [pgvector index-metadata embedding-model]
  (let [index (qualify-index (semantic.index/default-index embedding-model) index-metadata)]
    {:index              index
     :index-table-exists (index-table-exists? pgvector index)
     :active             false}))

(defn record-new-index-table!
  "Records an index in the metadata table and returns its assigned ID.
  The indexes :table-name must be unique, or you will receive a constraint violation."
  [pgvector index-metadata index]
  (let [{:keys [metadata-table-name]}
        index-metadata

        {index-table-name :table-name
         index-version    :version
         :keys            [embedding-model]}
        index

        {:keys [provider
                model-name
                vector-dimensions]}
        embedding-model

        insert-sql (-> (sql.helpers/insert-into (keyword metadata-table-name))
                       (sql.helpers/values [{:provider          provider
                                             :model_name        model-name
                                             :vector_dimensions vector-dimensions
                                             :table_name        index-table-name
                                             :index_version     index-version
                                             :index_created_at  [:now]}])
                       (sql.helpers/returning :id)
                       (sql/format :quoted true))
        {:keys [id]} (jdbc/execute-one! pgvector insert-sql {:builder-fn jdbc.rs/as-unqualified-lower-maps})]
    id))

(comment
  (def pgvector ((requiring-resolve 'metabase-enterprise.semantic-search.env/get-pgvector-datasource!)))
  (def index-metadata ((requiring-resolve 'metabase-enterprise.semantic-search.env/get-index-metadata)))
  (def embedding-model ((requiring-resolve 'metabase-enterprise.semantic-search.env/get-configured-embedding-model)))
  (create-tables-if-not-exists! pgvector index-metadata)
  (ensure-control-row-exists! pgvector index-metadata)
  (drop-tables-if-exists! pgvector index-metadata)
  (get-active-index-state pgvector index-metadata)
  (-> (get-active-index-state pgvector index-metadata)
      :metadata-row
      :indexer_last_poll
      class))
